install.packages("devtools")library(devtools)install.packages("sdm")library(sdm)installAll()install.packages(c("raster", "rgdal", "sf", "rgeos", "ggplot2", "cowplot", "plotly", "mapview", "ggfortify", "scales", "factoextra ", "ggcorrplot", "prettyGraphs", "Rtsne", "DT", "parallel", "snow", "sdm", "FactoMineR", "rdist", "usdm", "dplyr", "tidyr", "purrr", "purrrlyr", "here", "fs", "stringr", "exactextractr", "gdalUtils", "dismo", "rgbif", "boot", "httr"))First we create a folder in our working directory to include input data.
# Create folder to store inputs.
if(!dir_exists('input_data')){
dir_create('input_data')
}
Now let’s download data from WorldClim 2.1. Note that when R downloads a file it has a timeout of 60 seconds. This may not be enough to download environmental data, so we can set options(timeout=n), where n is the number of seconds we need to download the data.
# This option allows us to control how much time do we need to download the data. If R takes more than 10 minutes (600 seconds) to download the data it will stop the download. Increase the timeout if needed.
options(timeout=600)
# Current data
# Files are automatically saved in input_data folder.
WorldClim_data('current', variable = 'bioc', resolution = 10)
## [1] "The file for current scenario is already downloaded."
# Future data
gcms <- c('cc', 'gg', 'mr', 'uk')
WorldClim_data('future', variable = 'bioc', year = '2090', gcm = gcms, ssp = c('245'), resolution = 10)
## [1] "The file for future scenario (input_data/WorldClim_data_future/cc_ssp245_10m_2090.tif) is already downloaded."
## [1] "The file for future scenario (input_data/WorldClim_data_future/gg_ssp245_10m_2090.tif) is already downloaded."
## [1] "The file for future scenario (input_data/WorldClim_data_future/mr_ssp245_10m_2090.tif) is already downloaded."
## [1] "The file for future scenario (input_data/WorldClim_data_future/uk_ssp245_10m_2090.tif) is already downloaded."
# Downloading data from GBIF
# File is automatically saved in input_data folder
# spp_data <- GBIF_data('Colossoma macropomum')
# Obtaining Natural Earth data:
#shape_study_area <- rnaturalearth::ne_download(scale = 50, type = "rivers_lake_centerlines", category = "physical")
Firstly, we name inputs and outputs, caring for using the correct extensions. a) Inputs:
# Shapefile (polygon or lines) delimiting study area.
shape_study_area_file <- here("input_data/shape_study_area/AmazonHydroRivers4.shp")
# Directory name containing current rasters to be rescaled.
folder_current_rasters <- here("input_data/WorldClim_data_current")
# Directory name containing future rasters to be rescaled.
folder_future_rasters <- here("input_data/WorldClim_data_future")
# Name of shapefile (.shp) for the study area to be saved.
output_shp_study_area <- here("output_data/grid/Colossoma_grid.shp")
# Name of R object (.rds) where the current rescaled variables will be saved.
output_shp_current <- here("output_data/WorldClim_data_current_rescaled/Colossoma_current.rds")
# Set scenarios names:
scenarios <- apply(expand.grid(gcms, c("ssp245"),"10", 2090), 1, paste, collapse="_")
# Name of R object (.rds) where the future rescaled variables will be saved.
output_shp_future <- here(paste0("output_data/WorldClim_data_future_rescaled/",
scenarios,
".rds"))
# Cell hight and width for the grid.
# This value depends on rasters projection. If rasters are in UTM, values will be in meters. If rasters are in decimal degrees (as in WorldClim 2.1), values will be in degrees. However, note that the function make_grid (used to build the grid above study area) has an argument called epsg where we can reproject spatial data. The epsg of study area is further transmitted to predictor variables. This means that even if WorldClim 2.1 is projected in decimal degrees we should address cell sizes in the desired epsg.
# Following values build a grid with 100 x 100 km.
epsg = 6933
cell_width = 100000
cell_height = 100000
# Note that setting those values to build a grid smaller than the input rasters may generate NaNs, causing some problems.
# If you have any variable in shape_study_area that you want to keep for rescaling, you can set here.
# Set the correct names of variables using as much as 10 characters.
# Setting the names to list() will use none of the variables in the shape, while setting it to NULL will use all variables.
names_var_shp_study_area <- c("NEXT_DOWN", "MAIN_RIV", "LENGTH_KM",
"DIST_DN_KM", "DIST_UP_KM", "CATCH_SKM", "UPLAND_SKM",
"DIS_AV_CMS", "ORD_STRA", "ORD_CLAS", "ORD_FLOW")
raster_vars <- paste0('bio_', 1:19)
# As in the codeline above, here we set which variables in current rasters we want to keep for rescaling.
# Set the correct names of variables using as much as 10 characters.
# Setting the names to list() will use none of the variables in the shape, while setting it to NULL will use all variables.
current_var_names <- c(names_var_shp_study_area, raster_vars) # or NULL
# As in the codelines above, here we set which variables in future rasters we want to keep for rescaling.
# We will usually need at least the same variables as in the current scenario for projection.
# Set the correct names of variables using as much as 10 characters.
# Setting the names to list() will use none of the variables in the shape, while setting it to NULL will use all variables.
future_var_names <- current_var_names
The map of study area needs to be imported to R, so we can create a grid for the study area. This grid will be used for model building and projections.
shape_study_area <- shape_study_area_file %>%
st_read() %>%
repair_shp()
## Reading layer `AmazonHydroRivers4' from data source
## `/Users/luizesser/Documents/GitHub/scriptsdm/input_data/shape_study_area/AmazonHydroRivers4.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 107294 features and 14 fields
## Geometry type: LINESTRING
## Dimension: XY
## Bounding box: xmin: -79.37708 ymin: -20.21042 xmax: -48.44375 ymax: 5.139583
## Geodetic CRS: WGS 84
if (output_shp_study_area %>% file_exists()== F){
grid_study_area <- shape_study_area %>%
make_grid(cell_width, cell_height, names_var_shp_study_area, epsg=epsg) # target EPSG
output_shp_study_area %>%
path_dir() %>%
dir_create()
grid_study_area %>% st_write(
dsn = output_shp_study_area)
} else {
grid_study_area <- output_shp_study_area %>% st_read()
}
## Reading layer `Colossoma_grid' from data source
## `/Users/luizesser/Documents/GitHub/scriptsdm/output_data/grid/Colossoma_grid.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 642 features and 14 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -7600000 ymin: -2500000 xmax: -4600000 ymax: 7e+05
## Projected CRS: WGS 84 / NSIDC EASE-Grid 2.0 Global
The next step aims to cross data from study area with rasters of variables. We will start with current data.
### Rescaling current data
if (output_shp_current %>% file_exists() == F) {
grid_current <- grid_study_area %>%
add_raster(folder_current_rasters, raster_vars)
output_shp_current %>%
path_dir() %>%
dir_create()
grid_current %>% saveRDS(output_shp_current)
} else {
grid_current <- output_shp_current %>% readRDS()
}
Now within a loop to rescale future variables.
### Rescaling future data
if (!all(output_shp_future %>% file_exists())) {
for (i in 1:length(scenarios)) {
grid_future <- grid_study_area %>%
add_raster(folder_future_rasters, future_var_names, scenarios[i])
output_shp_future %>%
path_dir() %>%
dir_create()
grid_future %>% saveRDS(output_shp_future[grep(scenarios[i], output_shp_future)])
}
}
grid_future <- lapply(output_shp_future,function(x){readRDS(x)})
names(grid_future) <- scenarios
print(grid_future[[1]])
## Simple feature collection with 642 features and 33 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -7600000 ymin: -2500000 xmax: -4600000 ymax: 7e+05
## Projected CRS: WGS 84 / NSIDC EASE-Grid 2.0 Global
## First 10 features:
## cell_id next_down main_riv length_km dist_dn_km dist_up_km catch_skm
## 1 14 61289429 60443230 4.293571 4552.214 75.19286 16.57214
## 2 15 61283896 60443230 4.080200 4425.778 204.43800 16.11340
## 3 16 61287342 60443230 7.184286 4180.364 268.33929 25.34786
## 4 42 61269036 60443230 4.506000 4856.160 56.50000 19.38600
## 5 43 61267896 60443230 4.825312 4768.006 166.03281 20.73984
## 6 44 61270528 60443230 4.130400 4565.809 271.66000 16.76870
## 7 45 61276281 60443230 3.645634 4357.465 458.27042 13.64366
## 8 46 61272984 60443230 5.298833 4139.190 420.98000 18.32750
## 9 47 61266914 60443230 6.261935 3980.697 185.00968 30.28581
## 10 48 61266777 60443230 4.982143 3972.986 87.49286 25.37643
## upland_skm dis_av_cms ord_stra ord_clas ord_flow x_centroid y_centroid
## 1 1393.529 3.1836429 4.000000 3.000000 6.000000 -6250000 -2450000
## 2 10805.804 23.1558000 4.320000 2.840000 5.720000 -6150000 -2450000
## 3 6758.746 8.8958929 4.000000 3.571429 5.392857 -6050000 -2450000
## 4 1299.480 2.5990000 4.000000 4.000000 6.000000 -6450000 -2350000
## 5 7113.725 17.9444687 4.812500 3.031250 5.312500 -6350000 -2350000
## 6 16758.634 44.0094300 5.130000 2.500000 5.290000 -6250000 -2350000
## 7 38211.137 82.4315775 5.436620 2.281690 4.816901 -6150000 -2350000
## 8 26306.177 41.4814667 4.900000 3.116667 5.183333 -6050000 -2350000
## 9 5719.913 7.2719355 4.354839 4.548387 6.322581 -5950000 -2350000
## 10 1870.321 0.2140714 4.285714 5.714286 8.357143 -5850000 -2350000
## bio_1 bio_2 bio_3 bio_4 bio_5 bio_6 bio_7 bio_8
## 1 20.24684 13.84176 68.54555 204.1144 29.70086 9.527322 20.17353 20.38651
## 2 26.55916 12.37412 59.97032 292.1918 36.66457 16.007487 20.65708 27.52043
## 3 29.18168 13.49217 58.02394 341.9572 40.75657 17.496642 23.25993 30.51738
## 4 12.79734 16.66331 67.75534 253.9190 23.42181 -1.163224 24.58503 14.64541
## 5 16.63256 16.14285 69.75631 210.2901 26.97781 3.841557 23.13626 16.96295
## 6 22.87325 13.98092 67.31177 216.3198 32.78920 12.011138 20.77806 22.89831
## 7 26.41511 12.70065 62.58097 262.6446 36.53996 16.217002 20.32296 26.91273
## 8 29.69522 13.21537 58.52635 320.3550 41.18508 18.604747 22.58034 30.59864
## 9 30.29418 13.18291 55.77331 333.2756 42.30888 18.682555 23.62632 30.98534
## 10 30.72369 13.52517 56.58782 318.4834 42.81487 18.929925 23.88494 30.94970
## bio_9 bio_10 bio_11 bio_12 bio_13 bio_14 bio_15 bio_16
## 1 17.073420 22.64973 17.458527 574.2856 132.4124 2.353883 97.12121 344.7287
## 2 24.139089 30.01422 22.671924 661.7264 141.1422 5.997128 87.11182 375.7356
## 3 26.611829 33.43858 24.843783 538.5381 111.9852 3.509868 86.21881 301.7986
## 4 8.646783 15.30873 9.223421 378.9018 106.2629 1.331266 111.78573 251.1362
## 5 13.716360 19.08248 13.811794 530.0330 144.3075 1.773711 106.48774 338.8569
## 6 19.962216 25.56179 20.015558 533.9380 131.0196 2.410201 104.74298 340.2003
## 7 24.445456 29.70589 23.050371 681.6485 147.9890 7.481844 85.50568 381.8590
## 8 27.253710 33.83632 25.686743 629.1914 116.2168 8.831161 76.68114 324.2497
## 9 27.885419 34.74869 26.172898 626.2308 114.5745 11.187795 72.26914 311.2446
## 10 28.678516 35.03932 26.795579 711.6006 123.6856 15.287990 66.94596 338.6468
## bio_17 bio_18 bio_19 geometry
## 1 11.353190 168.4213 12.093882 POLYGON ((-6300000 -2500000...
## 2 24.672886 170.3395 40.618524 POLYGON ((-6200000 -2500000...
## 3 16.088348 136.3927 37.042820 POLYGON ((-6100000 -2500000...
## 4 7.090657 208.8186 8.074054 POLYGON ((-6500000 -2400000...
## 5 7.903049 148.0404 11.308218 POLYGON ((-6400000 -2400000...
## 6 10.573087 139.7532 12.505996 POLYGON ((-6300000 -2400000...
## 7 30.472545 170.0650 52.635803 POLYGON ((-6200000 -2400000...
## 8 31.415579 170.2667 56.973362 POLYGON ((-6100000 -2400000...
## 9 37.018825 178.1825 65.417373 POLYGON ((-6e+06 -2400000, ...
## 10 49.273950 205.9026 84.190579 POLYGON ((-5900000 -2400000...
It is necessary to name the output. Be extra careful with extension names. a) Input:
spp_data <- here('input_data/spp_data.csv')
# Set the path to the output shapefile, which will contain the presence/absence matrix.
spp_output <- here("output_data/spp_pa.shp")
It is also necessary to set some other important parameters.
# Species names to be used in the study.
# Names should be identical from input/spp_data.csv obtained previously.
# Setting this to NULL will use all species.
#spp_names <- especie # or NULL
spp_names <- c("Colossoma.macropomum")
occ_shp <- spp_data %>%
occurrences_to_shapefile(spp_names, grid_study_area)
mapview(grid_study_area[,1], alpha.regions = 0, color = "red", lwd = 1, layer.name = "Study Area", legend=NULL) +
mapview(occ_shp, zcol = "species", layer.name = "Species")
We will say to the grid_study_area which cells have an occurrence record for the studied species.
spp_names_abv <- spp_names %>%
to_snake_case() %>%
abbreviate(minlength = 10) %>%
as.vector()
if (spp_output %>% file_exists() == F) {
grid_matrix_pa <- occ_shp %>%
occurrences_to_pa_shapefile(grid_study_area, spp_names)
spp_output %>%
path_dir() %>%
dir_create()
grid_matrix_pa %<>% select(all_of(spp_names_abv))
grid_matrix_pa %>% st_write(dsn = spp_output)
} else {
grid_matrix_pa <- spp_output %>% st_read()
}
## Reading layer `spp_pa' from data source
## `/Users/luizesser/Documents/GitHub/scriptsdm/output_data/spp_pa.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 642 features and 1 field
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -7600000 ymin: -2500000 xmax: -4600000 ymax: 7e+05
## Projected CRS: WGS 84 / NSIDC EASE-Grid 2.0 Global
grid_matrix_pa %>%
as.data.frame() %>%
select(all_of(spp_names_abv)) %>%
rowSums() %>%
as.vector() %>%
richness_map(., grid_study_area)
Check how many records there is to each species:
presences_number(grid_matrix_pa, spp_names)
Variable Selection must be done OR with a VIF routine, OR with a PCA. To perform a VIF routine, we will use only the environmental data from the occurrence data.
# Obtain occurrence data.frame and environmental variables:
df_vif <- get_df_vif(grid_current, grid_matrix_pa)
## var_names not informed. Variables detected: next_down, main_riv, length_km, dist_dn_km, dist_up_km, catch_skm, upland_skm, dis_av_cms, ord_stra, ord_clas, ord_flow, bio_1, bio_2, bio_3, bio_4, bio_5, bio_6, bio_7, bio_8, bio_9, bio_10, bio_11, bio_12, bio_13, bio_14, bio_15, bio_16, bio_17, bio_18, bio_19sp_names not informed. Detected species: clssm_mcrp
# Run VIF routine from usdm package:
vif_bio <- lapply(df_vif, function(x){vifcor(x,th=0.5)})
vif_bio[[1]]
## 23 variables from the 30 input variables have collinearity problem:
##
## bio_17 bio_16 upland_skm catch_skm bio_15 bio_7 bio_11 bio_6 bio_1 ord_stra bio_12 bio_19 bio_3 dist_up_km bio_2 bio_10 bio_9 bio_14 bio_18 next_down length_km dis_av_cms ord_clas
##
## After excluding the collinear variables, the linear correlation coefficients ranges between:
## min correlation ( bio_4 ~ main_riv ): -0.005198821
## max correlation ( bio_4 ~ ord_flow ): 0.3962631
##
## ---------- VIFs of the remained variables --------
## Variables VIF
## 1 main_riv 1.074130
## 2 dist_dn_km 1.387095
## 3 ord_flow 1.367601
## 4 bio_4 1.453938
## 5 bio_5 1.260669
## 6 bio_8 1.293997
## 7 bio_13 1.186953
# We can exclude variables with high VIF or go to the next step.
grid_current <- sapply(names(df_vif), function(x){select(grid_current, !vif_bio[[x]]@excluded)}, USE.NAMES = T)
# Name of the shapefile in which the PCA values will be stored.
shp_pca <- here("output_data/pca/shp_pca.rds")
# Name of the shapefile in which current PCA projection will be stored.
shp_preds <- here("output_data/pca/shp_preds.rds")
# Name of the shapefile in which future PCA projection will be stored.
shp_preds_future <- here("output_data/pca/shp_preds_future.rds")
shp_matrix_pa <- grid_matrix_pa
df_species <- shp_matrix_pa %>%
as.data.frame() %>%
select(-c('geometry'))
df_var_preditors <- output_shp_current %>%
get_predictors_as_df()
# Names of variables to be normalized (centered and scaled).
# Normalization can improve the modelling, the calculation of PCAs and the clusterization algorithms used to generate pseudoabsences.
var_normalization <- tolower(current_var_names) # OR: paste0("bio_",1:19)
# Names of variables to be used in PCA.
var_pca_bio <- var_normalization # OR: paste0("bio_",1:19)
# Number of PCA-axes to be retained.
nr_comp_pca_bio <- 4
# Names of variables to be used as predictors when training the models. It can be environmental variables or PCA-axes.
var_predictors <-c("dim_1_bio","dim_2_bio","dim_3_bio","dim_4_bio")
df_potential_predictors <- df_species %>%
bind_cols(df_var_preditors)
df_potential_predictors <- df_potential_predictors %>%
center_scale(var_normalization)
df_potential_predictors %>%
head() %>%
round(4) %>%
datatable(options = list(pageLength = 10, scrollX=T))
df_var_pca <- df_potential_predictors %>%
select(var_pca_bio[which(var_pca_bio %in% colnames(df_potential_predictors))])
df_var_pca %>%
head() %>%
round(4) %>%
datatable(options = list(pageLength = 10, scrollX=T))
df_var_pca %>%
corr_plot()
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
The table that follows shows the values of the axes and variables. A tabela a seguir mostra os valores dos eixos e variaveis.
if (shp_pca %>% file_exists() == T) {
file_delete(shp_pca)
}
pca_bio <- df_var_pca %>%
calc_pca(nr_comp_pca_bio, "bio")
pca_bio$var$loadings %>%
round(4) %>%
datatable(options = list(pageLength = 10, scrollX=T))
Save pca with study area:
if(!dir_exists('output_data/pca/')){
dir_create('output_data/pca/')
}
grid_study_area %>%
bind_cols(pca_bio$ind$coord %>% as.data.frame()) %>%
saveRDS(file = shp_pca)
Summary of dimensions. The following table shows the correlation between variables with axes (i.e. the significance from each variable to given axle).
pca_bio %>%
dt_pca_summ()
pca_bio %>%
contrib_scree()
pca_bio %>%
contrib_corr()
pca_bio %>%
contrib_dims()
PCA-axes considering the quality of variables’ representation.
pca_bio %>%
pca_cos2()
Quality of variables’ representation on axes.
pca_bio %>%
cos2_dims()
pca_bio %>%
cos2_corr()
pca_bio %>%
pca_bi_plot(df_species %>% rowMeans() %>% ceiling())
pca_bio %>%
comp_pca_retain()
## $broken.stick
## [1] 4
##
## $kaiser.mean
## [1] 7
##
## $horn.p
## [1] 5
df_potential_predictors <- df_potential_predictors %>%
bind_cols(pca_bio$ind$coord %>% as.data.frame())
df_potential_predictors %>%
head() %>%
round(4) %>%
datatable(options = list(pageLength = 10, scrollX=T))
if (shp_preds %>% file_exists() == F){
df_var_preditors <- df_potential_predictors %>%
select(var_predictors %>% all_of())
grid_study_area %>%
select(cell_id) %>%
bind_cols(df_var_preditors) %>%
saveRDS(file = shp_preds)
}
pca_future <- proj_pca(pca_bio, grid_future)
pca_future[[1]]
pca_future %>%
saveRDS(file = shp_preds_future)
As in previous steps, it is necessary to set inputs and outputs, taking extra care with extension names. a) Input: If you want to use PCA:
# To use PCA-axis:
df_var_preditors <- df_potential_predictors[,colnames(df_potential_predictors) %in% var_predictors] # PCA
grid_future <- pca_future # PCA
names(grid_future) <- scenarios
grid_future[[1]]
If you want to use VIF or raw predictors:
# To use raw predictors (VIF):
df_var_preditors <- output_shp_current %>%
get_predictors_as_df() %>% select(-c('cell_id'))
#df_var_preditors <- grid_current %>% as.data.frame() %>% select(-c('geometry', 'cell_id', 'y_centroid', 'x_centroid')) # raw predictors
grid_future <- lapply(output_shp_future,function(x){readRDS(x)}) # raw predictors
names(grid_future) <- scenarios
grid_future[[1]]
# Name the directory to save trained models.
folder_models <- here("output_data/models")
# Algorithm names to be used in Species Distribution Modeling.
# Run getmethodNames() to unveil which algorithms are available.
algo <- c("rbf", "svm", "mda")
# Set the threshold criteria.
# 1:sp=se, 2:max(se+sp), 3:min(cost), 4:minROCdist, 5:max(kappa), 6:max(ppv+npv), 7:ppv=npv, 8:max(NMI), 9:max(ccr), 10: prevalence
thresh_criteria <- 2
# Number of runs to each algorithm
n_run <- 10
# Number of folds to crossvalidation
n_folds <- 4
# Number of pseudoabsence sets
n_pa <- 1
To build models, it is necessary to use pseudoabsences that contrast to presences. Currently, only the ‘random’ method is applied.
df_pseudoabsences <- shp_matrix_pa %>%
pseudoabsences(df_var_preditors, spp_names, method="envelope", folder_models)
It is possible to plot a t-SNE graph to check whether pseudoabsence data clusters into a separate group from presence data.
tsne_list <- df_potential_predictors %>%
tsne_plot(df_pseudoabsences, spp_names)
## Warning: Using one column matrices in `filter()` was deprecated in dplyr 1.1.0.
## ℹ Please use one dimensional logical vectors instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(sp)
##
## # Now:
## data %>% select(all_of(sp))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
tsne_list
## [[1]]
As we are using the sdm package, let’s start to build our models by indicating our input data.
# For PCA routine:
for(s in colnames(df_species)){
df_pseudoabsences[[s]] <- df_pseudoabsences[[s]][,var_predictors]
}
# For VIF routine:
for(s in colnames(df_species)){
df_pseudoabsences[[s]] <- exclude(df_pseudoabsences[[s]], vif_bio[[s]])
}
d <- df_species %>%
fit_data(df_var_preditors, df_pseudoabsences)
## Loading required package: dismo
## Loading required package: gbm
## Loaded gbm 2.1.8.1
## Loading required package: tree
## Loading required package: mda
## Loading required package: class
## Loaded mda 0.5-3
## Loading required package: mgcv
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
## The following object is masked from 'package:usdm':
##
## Variogram
## The following object is masked from 'package:raster':
##
## getData
## This is mgcv 1.8-41. For overview type 'help("mgcv-package")'.
## Loading required package: glmnet
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loaded glmnet 4.1-6
## Loading required package: earth
## Loading required package: Formula
## Loading required package: plotmo
## Loading required package: plotrix
##
## Attaching package: 'plotrix'
## The following object is masked from 'package:scales':
##
## rescale
## Loading required package: TeachingDemos
##
## Attaching package: 'TeachingDemos'
## The following object is masked from 'package:plotly':
##
## subplot
## Loading required package: rJava
## Loading required package: RSNNS
## Loading required package: Rcpp
## Loading required package: ranger
## Loading required package: randomForest
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ranger':
##
## importance
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:ggplot2':
##
## margin
## Loading required package: rpart
## Loading required package: kernlab
##
## Attaching package: 'kernlab'
## The following object is masked from 'package:purrr':
##
## cross
## The following object is masked from 'package:scales':
##
## alpha
## The following object is masked from 'package:ggplot2':
##
## alpha
## The following objects are masked from 'package:raster':
##
## buffer, rotated
d[[1]]
## class : sdmdata
## ===========================================================
## number of species : 1
## species names : clssm_mcrp
## number of features : 7
## feature names : main_riv, dist_dn_km, ord_flow, ...
## type : Presence-Background
## has independet test data? : FALSE
## number of records : 194
## has Coordinates? : FALSE
With the data, we can build our models.
df_species %>%
train_models_to_folder(
d,
algo,
n_run,
n_folds,
folder_models
)
##
## The variable importance for all the models are combined (averaged)...
folder_models %>%
dir_ls() %>%
path_file() %>%
head() %>%
as.data.frame()
"Number of trained species: " %>%
paste0(
folder_models %>%
dir_ls() %>%
length()
) %>%
print()
## [1] "Number of trained species: 1"
How many models failed?
d %>%
model_failures(folder_models)
spp_names <- colnames(df_species)
thresholds_models <- spp_names %>%
sp_thresh_from_folder(folder_models, thresh_criteria)
thresholds_models_means <- spp_names %>%
validation_metrics_from_folder(folder_models, thresholds_models)
model_selection <- spp_names %>%
validation_metrics_from_folder(folder_models, thresholds_models, stats = 'AUC', th = 0.8)
thresholds_models_means[[1]]
To project our models in space, we need to set where the models were saved (previously set as the folder_models object) and where we want to save our projections.
directory_projections <- here("output_data/projections")
Set up some variables.
df_pa <- shp_matrix_pa %>%
as.data.frame() %>%
select(-c('geometry'))
df_potential_predictors <- df_pa %>%
bind_cols(df_var_preditors)
projection_data <- lapply(grid_future, function(x){ x <- as.data.frame(x)
x[!(names(x) %in% c("x_centroid", "y_centroid", "geometry"))]})
projection_data$current <- df_var_preditors
And finally run our projections.
# Project models in scenarios
df_pa %>% predict_to_folder(scenarios_list=projection_data,
models_folder=folder_models,
pred_methods=model_selection,
thr_criteria=thresh_criteria,
output_folder=directory_projections,
thresholds_models_means=thresholds_models_means
)
predictions_sp <- sapply(spp_names, function(x){sp_predictions_from_folder(x,directory_projections)},simplify=F, USE.NAMES = T)
pred_means <- predictions_means(predictions_sp, c(scenarios, 'current'))
ensembles <- gcm_ensemble(pred_means, ssp=c('current', 'ssp245'))
# Output ensembles
for (i in 1:length(ensembles)) {
write.csv(ensembles[[i]], paste0(directory_projections,'/',names(ensembles)[i],'.csv'))
}
ensemble_map(ensembles$current$current_freq_mean, grid_study_area, "Current", 'Frequence')
ensemble_map(ensembles$current$current_pa_mean, grid_study_area, "Current", 'Presence')